clean__get_town_hs__main<-function(years){
  setwd(wd_code)
  eval(parse('clean__get_town_hs_v2.R', encoding = 'UTF-8'))
   hs_names_and_locations<-data.frame(judet=character(),
                                      town_hs_bac=character(),
                                      unitate_de_invatamant=character(),
                                      hs_name_temp=character())
   
   #2012000018
   
   #idea: instead of adding towns to all data, do it for unique "unitate_de_invatamant", then merge
   
 for (i in years){
   print(i)
   setwd(wd_data_intermediate)
   data<-readRDS(paste("data_merged_complete_",i,sep=""))
   message(paste('Observations before:',nrow(data)))

   data<-data %>% filter_all(any_vars(!is.na(.)))
   data$town_hs_bac<-NA
   data<-clean__get_town_hs(data,i)
   print("town name added")
 
   if (i==years[1]){
      #test<-data %>% filter(is.na(town_hs_bac))
      
      hs_names_and_locations_temp<-as.data.frame(unique(data[,c("judet_bac","town_hs_bac","unitate_de_invatamant")]))
      
      
      hs_names_and_locations_temp$hs_name_temp<-unlist(stringi::stri_extract_first_regex(hs_names_and_locations_temp$unitate_de_invatamant, '(?<=").*?(?=")'))
      hs_names_and_locations_temp[is.na(hs_names_and_locations_temp$hs_name_temp),]$hs_name_temp<-hs_names_and_locations_temp[is.na(hs_names_and_locations_temp$hs_name_temp),]$unitate_de_invatamant
      
      hs_names_and_locations_temp[hs_names_and_locations_temp$hs_name_temp=="",]$hs_name_temp<-hs_names_and_locations_temp[hs_names_and_locations_temp$hs_name_temp=="",]$unitate_de_invatamant
      
      hs_names_and_locations<-rbind(hs_names_and_locations,hs_names_and_locations_temp)
      hs_names_and_locations<-unique(hs_names_and_locations)
      hs_names_and_locations<-hs_names_and_locations[!is.na(hs_names_and_locations$town_hs_bac),]
      hs_names_and_locations<-hs_names_and_locations[order(hs_names_and_locations$judet_bac,hs_names_and_locations$town_hs_bac),]
   
      
      hs_names_and_locations$year.y<-i
   }
   
   if (i!=years[1]){
      
      temp<-data[(is.na(data$town_hs_bac)|trimws(data$town_hs_bac)=="") & data$liceu_repartizat_corresponding_to_unitate_de_invatamant!='' & data$town_hs_bac!='NO HS MATCHED',]
      temp<-temp %>% filter_all(any_vars(!is.na(.)))
      if (dim(temp)[1]>0){
         temp$town_hs_bac<-NA
         temp$hs_name_temp<-stringi::stri_extract_first_regex(temp$unitate_de_invatamant, '(?<=").*?(?=")')
         temp[is.na(temp$hs_name_temp),]$hs_name_temp<-temp[is.na(temp$hs_name_temp),]$unitate_de_invatamant
         
         #add names to towns with empty names by using unitate_de_invatamant
         data_missing_town_1<-bind_rows(lapply(unique(temp$judet_bac), function(x) temp[which(temp$judet_bac==x),] %>%
                                    stringdist_left_join(hs_names_and_locations[which(hs_names_and_locations$judet_bac==x & (!is.na(hs_names_and_locations$hs_name_temp))),],
                                                         by = c(hs_name_temp="hs_name_temp"),
                                                         max_dist = 3,
                                                         distance_col="dist1")))
         data_missing_town_1<-data_missing_town_1 %>% 
            group_by(an,judet_bac.x,town_hs_bac.x,nume_bac,unitate_de_invatamant.x,media,liceu_repartizat_corresponding_to_unitate_de_invatamant,media_la_admitere) %>% 
            slice(which.min(dist1))
         data_missing_town_1$town_hs_bac.x<-data_missing_town_1$town_hs_bac.y
         data_missing_town_1<-data_missing_town_1[, -grep("\\.y", colnames(data_missing_town_1))]
         colnames(data_missing_town_1)<-sub("\\.x","",colnames(data_missing_town_1))
         data_missing_town_1<-data_missing_town_1[, !colnames(data_missing_town_1) %in% c("dist1","hs_name_temp")]
         
         
         
         # #add names to towns with empty names by using liceu_repartizat
         # data_missing_town_2<-anti_join(temp,data_missing_town_1,by=c("judet_bac","unitate_de_invatamant"))
         # data_missing_town_2<-bind_rows(lapply(unique(data_missing_town_2$judet_bac), function(x) data_missing_town_2[which(data_missing_town_2$judet_bac==x),] %>%
         #                                          stringdist_left_join(hs_names_and_locations[which(hs_names_and_locations$judet_bac==x & hs_names_and_locations$town_hs_bac!='' & !is.na(hs_names_and_locations$town_hs_bac) ),],
         #                                                               by = c(unitate_de_invatamant="unitate_de_invatamant"),
         #                                                               max_dist = 3,
         #                                                               distance_col="dist")))
         # 
         # 
         # #add names to towns with empty names by using unitate_de_invatamant
         # data_missing_town_3<-anti_join(temp,data_missing_town_1,by=c("judet_bac","unitate_de_invatamant"))
         # data_missing_town_3<-bind_rows(lapply(unique(data_missing_town_3$judet_bac), function(x) data_missing_town_3[which(data_missing_town_3$judet_bac==x),] %>%
         #                                          stringdist_left_join(hs_names_and_locations[which(hs_names_and_locations$judet_bac==x & hs_names_and_locations$town_hs_bac!='' & !is.na(hs_names_and_locations$town_hs_bac) ),],
         #                                                               by = c(liceu_repartizat_corresponding_to_unitate_de_invatamant="unitate_de_invatamant"),
         #                                                               max_dist = 3,
         #                                                               distance_col="dist")))
         # 
         
         
         data<-anti_join(data,data_missing_town_1,by=c("an","judet_bac","nume_bac","unitate_de_invatamant","media","liceu_repartizat_corresponding_to_unitate_de_invatamant","media_la_admitere"))
         data<-rbind(data,data_missing_town_1)
         
         # temp2<-semi_join(data,data_missing_town_1,by=c("an","judet_bac","nume_bac","unitate_de_invatamant","media","liceu_repartizat_corresponding_to_unitate_de_invatamant","media_la_admitere"))
         # temp2<-data_missing_town_1 %>% group_by(town_hs_bac,unitate_de_invatamant) %>% summarize(n=n())
         # temp2<-data[data$town_hs_bac=='',] %>% filter_all(any_vars(!is.na(.)))
         # temp2<-data[(is.na(data$town_hs_bac)|trimws(data$town_hs_bac)=="") & data$liceu_repartizat_corresponding_to_unitate_de_invatamant!='' & data$town_hs_bac!='NO HS MATCHED',] %>% filter_all(any_vars(!is.na(.)))

         
         #add hs names to old hs names
         hs_names_and_locations_temp<-unique(data[,c("judet_bac","town_hs_bac","unitate_de_invatamant")])
         
         
         hs_names_and_locations_temp$hs_name_temp<-unlist(stringi::stri_extract_first_regex(hs_names_and_locations_temp$unitate_de_invatamant, '(?<=").*?(?=")'))
         hs_names_and_locations_temp[is.na(hs_names_and_locations_temp$hs_name_temp),]$hs_name_temp<-hs_names_and_locations_temp[is.na(hs_names_and_locations_temp$hs_name_temp),]$unitate_de_invatamant
         hs_names_and_locations_temp[hs_names_and_locations_temp$hs_name_temp=="",]$hs_name_temp<-hs_names_and_locations_temp[hs_names_and_locations_temp$hs_name_temp=="",]$unitate_de_invatamant
         hs_names_and_locations_temp$year.y<-i
         
         hs_names_and_locations<-rbind(hs_names_and_locations,hs_names_and_locations_temp)
         hs_names_and_locations<-unique(hs_names_and_locations)
         hs_names_and_locations<-hs_names_and_locations[!is.na(hs_names_and_locations$town_hs_bac),]
         hs_names_and_locations<-hs_names_and_locations[order(hs_names_and_locations$judet_bac,hs_names_and_locations$town_hs_bac),]
         
      }
   }
   
   data<-data[!is.na(data$nume_bac) & !is.na(data$judet_bac),]
   
   
   if (i==2013){
      data<-data[!is.na(data$town_hs_bac),] 
   }
   
   #Save data
   setwd(wd_data_intermediate)
   graduation_file_final<-paste("data_merged_complete_town_",i,sep="")
   saveRDS(data, file = paste(graduation_file_final,sep=""))
   message(paste('Observations after:',nrow(data)))
   
   
   
   }
   
}